Load libraries

library(magrittr)
library(forecast)
library(ggplot2)
library(tseries)

Read-in Data

# Read one hourly data:
DFHour <- read.csv("videoHourly02.csv",header=TRUE,sep=",")
# Calculate Percent Lost
DFHour$packtsPcnt <- (DFHour$pcktsLost/DFHour$pcktsCount)*100
DFAnalysis <- DFHour[,c(4,2,3,5)]
head(DFAnalysis)
##               dteTime pcktsCount pcktsLost packtsPcnt
## 1 2017-01-08 00:00:00     986.58      6.12  0.6203248
## 2 2017-01-08 01:00:00     984.47      7.09  0.7201845
## 3 2017-01-08 02:00:00     984.64      6.62  0.6723269
## 4 2017-01-08 03:00:00     997.94      6.20  0.6212798
## 5 2017-01-08 04:00:00     987.24      5.38  0.5449536
## 6 2017-01-08 05:00:00     976.99      5.71  0.5844482

Convert to Time Series

pckt_data_full <- DFAnalysis$packtsPcnt%>%ts(start=c(0,1),frequency = 24)

# Keep only first 21 days
pckt_data <- pckt_data_full%>%subset.ts(end=21*24)
# Create Test Set for one day in the future
pckt_data_test <- pckt_data_full%>%subset.ts(start=21*24+1, end =  22*24)

pckt_data%>%autoplot() + ylab("Percent Packet Lost (%)") + xlab("Time (Days/Hours)")

adf.test(pckt_data)
## Warning in adf.test(pckt_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  pckt_data
## Dickey-Fuller = -7.8037, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
?kpss.test
kpss.test(pckt_data, null="Trend")
## Warning in kpss.test(pckt_data, null = "Trend"): p-value greater than
## printed p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  pckt_data
## KPSS Trend = 0.029491, Truncation lag parameter = 5, p-value = 0.1

Explore sesonality of the data

pckt_data%>%ggsubseriesplot() + xlab("Hour of the day") + ylab("Percent Packet Lost (%)")

pckt_data%>%ggPacf() +ggtitle("Autocorrelation for Percent Package Loss Series")

pckt_data%>%Box.test(type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  .
## X-squared = 46.655, df = 1, p-value = 8.466e-12
?Box.test
ggsubseriesplot(pckt_data)

ggseasonplot(pckt_data%>%subset(end=10*24))

library(TSA)
## Loading required package: leaps
## Loading required package: locfit
## locfit 1.5-9.1    2013-03-22
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:forecast':
## 
##     fitted.Arima, plot.Arima
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
pdg <- periodogram(pckt_data)

pdg$spec%>%order
##   [1] 154  76  73 184  93 120 182   5  92 115 181 125 157 253 200 130 197
##  [18] 201 250 139  81  70 198  84 153  36  45  59  13 161 105 183 109 143
##  [35] 244 104 138 169 171 159 237  62 148 117  82   9 137   4 205  74 188
##  [52] 226  63  89 179  72 133 165 174   8 151 194 218 172  80  44  60  88
##  [69] 202 224  68 248 208 212 167 106 251  94  46 203  99 170 163  97 209
##  [86] 166 103  33 155  16 135 186 232 162  91 204 222 102  24 127 142 123
## [103] 229 134  61 195 131 246 221 118 219 199  25  77  95 238  26 243 101
## [120] 207 121  12 158 216 177  57  67 233 241  34 242  51  32  27   1  10
## [137]  50 113  71  65 191 254 156 152 247 140  83  17  69 178 187  96 245
## [154] 240  39 217  15 164  38  19 235  37 114 175 211 122 180  54 129  11
## [171] 116 228 234 252 236 173  35 110 215 220 146  87  75  90 239 136 160
## [188] 249 144 132  30 231  41  49 189 126  23 196  47 112 223 185 119   3
## [205] 100 107  78 124 230 206  66  55  29   2  40  56  52 256 210 108  53
## [222] 145  79 150  20   6  98 147 128  28  58 227 225 141 190 168 214  14
## [239] 176 111  48 193  18 192 255  86  42  31  43 213  22 149  64   7  85
## [256]  21
1/pdg$freq[21]
## [1] 24.38095
1/pdg$freq[85]
## [1] 6.023529
1/pdg$freq[7]
## [1] 73.14286
1/pdg$freq[64]
## [1] 8

Model Fitting

Seasonal Naive

fitnaive <- snaive(pckt_data)
fcnaive <- forecast(fitnaive,h=24)
fcnaive%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)

accuracy(fcnaive,pckt_data_test)
##                        ME      RMSE       MAE        MPE     MAPE     MASE
## Training set  0.007383779 0.2914955 0.2311604  -6.039159 30.68733 1.000000
## Test set     -0.145475086 0.3528613 0.2632881 -31.596214 43.99839 1.138985
##                   ACF1 Theil's U
## Training set 0.1871620        NA
## Test set     0.1325135    1.2661
checkresiduals(fitnaive)

## 
##  Ljung-Box test
## 
## data:  residuals
## Q* = 306.47, df = 48, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 48

ETS Model

fitexp <- ets(pckt_data)
summary(fitexp)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = pckt_data) 
## 
##   Smoothing parameters:
##     alpha = 0.0756 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 0.6628 
##     s=1.0464 1.0166 0.9161 1.4182 1.2196 1.1259
##            0.8928 1.2173 1.3941 1.1501 1.31 1.1657 0.7631 0.6679 0.673 0.8169 0.7968 0.9682 0.8509 0.8214 0.815 0.9997 1.0404 0.9139
## 
##   sigma:  0.241
## 
##      AIC     AICc      BIC 
## 1502.065 1505.242 1616.075 
## 
## Training set error measures:
##                       ME      RMSE       MAE       MPE   MAPE      MASE
## Training set 0.005692413 0.1988161 0.1563488 -5.167803 20.679 0.6763651
##                    ACF1
## Training set 0.02061415
?ets
fcets <- fitexp%>%forecast(h=24)
fcets%>%autoplot() +ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)

accuracy(fcets,pckt_data_test)
##                        ME      RMSE       MAE        MPE     MAPE
## Training set  0.005692413 0.1988161 0.1563488  -5.167803 20.67900
## Test set     -0.146498059 0.2319476 0.1920514 -27.910633 32.14154
##                   MASE        ACF1 Theil's U
## Training set 0.6763651  0.02061415        NA
## Test set     0.8308148 -0.00436959  1.046091
checkresiduals(fitexp)

## 
##  Ljung-Box test
## 
## data:  residuals
## Q* = 93.432, df = 22, p-value = 8.871e-11
## 
## Model df: 26.   Total lags used: 48
?Box.test

ARIMA Models

BoxCox.lambda(pckt_data)
## [1] -0.3036466
autoplot(pckt_data)

pckt_data%>%BoxCox(-0.303)%>%autoplot

ARIMA Models

arimafit <- auto.arima(pckt_data)
summary(arimafit)
## Series: pckt_data 
## ARIMA(1,0,1)(2,0,0)[24] with non-zero mean 
## 
## Coefficients:
##          ar1      ma1    sar1    sar2    mean
##       0.7348  -0.5507  0.2789  0.2204  0.8018
## s.e.  0.0909   0.1120  0.0435  0.0464  0.0320
## 
## sigma^2 estimated as 0.05205:  log likelihood=29.27
## AIC=-46.54   AICc=-46.37   BIC=-21.2
## 
## Training set error measures:
##                       ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.001963561 0.2270144 0.1780557 -7.828116 23.97321 0.7702693
##                     ACF1
## Training set -0.01080923
checkresiduals(arimafit)

## 
##  Ljung-Box test
## 
## data:  residuals
## Q* = 77.309, df = 43, p-value = 0.001028
## 
## Model df: 5.   Total lags used: 48
fcarima<- arimafit%>% forecast(h=24)
fcarima%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)

accuracy(fcarima, pckt_data_test)
##                        ME      RMSE       MAE        MPE     MAPE
## Training set  0.001963561 0.2270144 0.1780557  -7.828116 23.97321
## Test set     -0.115330883 0.2503180 0.1998090 -27.143646 34.65398
##                   MASE        ACF1 Theil's U
## Training set 0.7702693 -0.01080923        NA
## Test set     0.8643741  0.15197010  1.051514

TBATS

fitbats <- tbats(pckt_data)
fctbats <- forecast(fitbats, h=24) 
fctbats%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)

accuracy(f = fctbats, pckt_data_test)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  0.02633079 0.2050179 0.1574748  -2.629472 20.20505 0.6812360
## Test set     -0.11678586 0.1989356 0.1582125 -23.103819 27.08748 0.6844277
##                     ACF1 Theil's U
## Training set -0.02784862        NA
## Test set      0.03971624 0.8779549
checkresiduals(fitbats)

## 
##  Ljung-Box test
## 
## data:  residuals
## Q* = 109.82, df = 29, p-value = 2.474e-11
## 
## Model df: 19.   Total lags used: 48